## Calculating Spillover Information for Analysis

#packages
library(sp)
library(haven)
library(labelled)
library(dplyr)
library(readr)
library(stringr)
library(purrr)

# set-up information
raw_path <- 'data/1_raw/'
clean_path <- 'data/2_clean/'
formatted_path <- 'data/3_formatted/'

# Functions --------------------------------------------------------------------
source("scripts/0_functions/functions_spillover.R")

# Data -------------------------------------------------------------------------

# Load mkt_scoping_samp data
load(paste0(clean_path, "mkt_scoping_sample.RDATA"))

# load market level outcome data
load(paste0(formatted_path, "market_lvl.RDATA"))

#load vendor_end and vendor_base (for ids only, to make ind. lvl. adj. mat.)
load(paste0(clean_path, "vendor_base.RDATA"))
load(paste0(clean_path, "vendor_end.RDATA"))

ids_b <- subset(vendor_base, surveytype2 == "long")[c("id", "market")]
ids_e <- subset(vendor_end, long_survey == 1)[c("resp_id", "market")]

# Load other markets sold data 
o_m_b <- read_csv(paste0(raw_path, "othr_mkts_base.csv"))
o_m_e <- read_csv(paste0(raw_path, "othr_mkts_end.csv"))
## making "n/a" into NA
o_m_b[o_m_b == "n/a"] <- NA
o_m_e[o_m_e == "n/a"] <- NA


#load market name data
market_names <- sort(read_csv(paste0(clean_path,"treatment_groups.csv"))$Market)

#ind lvl adjacency matrix for baseline
adj_b <- make_ind_adj(o_m_b, ids_b) %>% arrange(market, id)
# and for endline
adj_e <- make_ind_adj(o_m_e, ids_e) %>% arrange(market, resp_id)

# Calculating Distance Matrix --------------------------------------------------
dist_mat_miguel <- spDists(as.matrix(mkt_scoping_samp %>% 
                                arrange(mktname) %>% 
                                select(longitude, latitude, ven_daily_wt,
                                       ven_max_wt) %>%
                                na.omit() %>% 
                                select(longitude, latitude)),
                    longlat = T)
miguel_mkts <- !(is.na(mkt_scoping_samp$longitude) | is.na(mkt_scoping_samp$latitude) |
                     is.na(mkt_scoping_samp$ven_daily_wt) |
                     is.na(mkt_scoping_samp$ven_max_wt))

dist_mat_IPW <- spDists(as.matrix(mkt_scoping_samp %>% 
                                       arrange(mktname) %>% 
                                       select(longitude, latitude) %>%
                                       na.omit() %>% 
                                       select(longitude, latitude)),
                           longlat = T)
IPW_mkts <- !(is.na(mkt_scoping_samp$longitude) | is.na(mkt_scoping_samp$latitude))

#what's the max distance someone walked:
##getting number of respondents per market
n_per_unit_bl <- vendor_base %>% filter(surveytype2 == "long", !is.na(sell_in_othr_mkts)) %>% 
  arrange(market) %>% group_by(market) %>% summarise(n = n()) %>% pull(n)
n_per_unit_el <- vendor_end %>% filter(long_survey == 1) %>% 
  arrange(market) %>% group_by(market) %>% summarise(n = n()) %>% pull(n)

#elementwise multiplying ind lvl adj mat with expanded distance mat
distances_traveled <- c(as.matrix(adj_b[rep(IPW_mkts, 
                                            times = n_per_unit_bl),2:129][, IPW_mkts])*
  as.matrix(data.frame(dist_mat_IPW[rep(1:127, times = n_per_unit_bl[IPW_mkts]),])))
distances_traveled <- distances_traveled[distances_traveled != 0]


# Miguel Spillovers ------------------------------------------------------------

#### Running Miguel Spilloevers
# missing location data for one market, need to ignore it for calculations
na_id <- !(is.na(mkt_scoping_samp$longitude) | is.na(mkt_scoping_samp$ven_daily_wt) |
             is.na(mkt_scoping_samp$ven_max_wt))

spills_Miguel <- get_ring_tots(dist_mat_miguel, c(2, 5, 10), 
                               mkt_scoping_samp$treatment_status[na_id],
                               list(ven_max_wt = mkt_scoping_samp$ven_max_wt[na_id],
                                    ven_daily_wt = mkt_scoping_samp$ven_daily_wt[na_id],
                                    number = rep(1, sum(na_id))),
                               mkt_scoping_samp$mktname[na_id])

# IPW Spillovers ---------------------------------------------------------------

# Set Up for IPW
set.seed(346)

#get blocks information
blocks <- read_dta(paste0(raw_path, "block_ids.dta")) %>% 
  mutate_if(is.labelled, to_factor) %>%
  mutate(market = as.character(market)) %>% 
  arrange(market)

blocks$mktname <- (mkt_scoping_samp %>% arrange(mktname))$mktname

#matrix of all possible condition combinations
conds <- expand.grid(c("000", "100", "010", "001"),
                     c("000", "100", "010", "001",
                       "101", "110", "011", "111"))
#creating a vector of possible conditions
conds <- apply(conds, 1, 
               paste0, sep = "", collapse = "_")

distances <- c(2, 5, 10)

treat_conds_mktlvl <- ((market_lvl %>%
                          filter(Endline == 1))[, c("BU", "TD", 
                                                    "Both")] %>% 
                         as.matrix())

#### Running IPW Spillovers Using Distances Only
spills_IPW <- get_spillIPW(conds, distances, dist_mat_IPW, treat_conds_mktlvl,
                           subset_unit = IPW_mkts,
                           blocks = blocks$block_id, N_within = 4,
                           treat_m = 4, sim = 10000,
                           id_vec = mkt_scoping_samp$mktname)


#### Running IPW Spillovers Using Baseline Data Only
#making distance matrix with distances all over 1 km
dist_mat_0 <- matrix(5, ncol = 127, nrow = 127)
diag(dist_mat_0) <- 0
spills_IPW_ind_bl <- get_spillIPW(conds, 1, dist_mat_0, 
                                 treat_conds_mktlvl,
                                 blocks = blocks$block_id, N_within = 4,
                                 treat_m = 4, sim = 10000,
                                 subset_unit = IPW_mkts,
                                 id_vec = vendor_base %>% 
                                   arrange(market, id) %>% 
                                   filter(surveytype2 == "long") %>% 
                                   pull(market), ind_lvl = T,
                                 n_per_unit = n_per_unit_bl, 
                                 ind_adj = as.matrix(adj_b[,2:129]))

#### Running IPW Spillovers Combining Baseline Data and Distance Data
spills_IPW_mixed_bl <- get_spillIPW(conds, distances, dist_mat_IPW, 
                                 treat_conds_mktlvl,
                                 blocks = blocks$block_id, N_within = 4,
                                 treat_m = 4, sim = 10000,
                                 subset_unit = IPW_mkts,
                                 id_vec = vendor_base %>% 
                                   arrange(market, id) %>% 
                                   filter(surveytype2 == "long") %>% 
                                   pull(market), ind_lvl = T,
                                 n_per_unit = n_per_unit_bl, 
                                 ind_adj = as.matrix(adj_b[,2:129]))

#### Endline Spillover Probability Estimates
spills_IPW_ind_el <- get_spillIPW(conds, 1, dist_mat_0, 
                                  treat_conds_mktlvl,
                                  blocks = blocks$block_id, N_within = 4,
                                  treat_m = 4, sim = 10000,
                                  subset_unit = IPW_mkts,
                                  id_vec = vendor_end %>% 
                                    arrange(market, resp_id) %>% 
                                    filter(long_survey == 1) %>% 
                                    pull(market), ind_lvl = T,
                                  n_per_unit = n_per_unit_el, 
                                  ind_adj = as.matrix(adj_e[,2:129]))

spills_IPW_mixed_el <- get_spillIPW(conds, distances, dist_mat_IPW, 
                                    treat_conds_mktlvl,
                                    blocks = blocks$block_id, N_within = 4,
                                    treat_m = 4, sim = 10000,
                                    subset_unit = IPW_mkts,
                                    id_vec = vendor_end %>% 
                                      arrange(market, resp_id) %>% 
                                      filter(long_survey == 1) %>% 
                                      pull(market), ind_lvl = T,
                                    n_per_unit = n_per_unit_el, 
                                    ind_adj = as.matrix(adj_e[,2:129]))



#### Creating IPW Mkt Lvl to Merge with Endline (*from Baseline*)

#getting modal outcomes per market
spills_IPW_ind_bl$mkt_mod_cond_d1 <- ave(spills_IPW_ind_bl$cond_d1, 
                                         spills_IPW_ind_bl$id,
                                      FUN = function(x) getmode(x, na.rm = T)) 
spills_IPW_mixed_bl$mkt_mod_cond_d2 <- ave(spills_IPW_mixed_bl$cond_d2, 
                                           spills_IPW_mixed_bl$id,
                                      FUN = function(x) getmode(x, na.rm = T)) 
spills_IPW_mixed_bl$mkt_mod_cond_d5 <- ave(spills_IPW_mixed_bl$cond_d5, 
                                           spills_IPW_mixed_bl$id,
                                      FUN = function(x) getmode(x, na.rm = T)) 
spills_IPW_mixed_bl$mkt_mod_cond_d10 <- ave(spills_IPW_mixed_bl$cond_d10, 
                                            spills_IPW_mixed_bl$id,
                                      FUN = function(x) getmode(x, na.rm = T)) 

#getting probabilities of modal outcome for each respondent
spills_IPW_ind_bl$mkt_mod_prob_d1 <- get_prob(spills_IPW_ind_bl[, 1:32], 
                                           spills_IPW_ind_bl$mkt_mod_cond_d1,
                                           conds)
spills_IPW_mixed_bl$mkt_mod_prob_d2 <- get_prob(spills_IPW_mixed_bl[, 1:32], 
                                           spills_IPW_mixed_bl$mkt_mod_cond_d2,
                                           conds)
spills_IPW_mixed_bl$mkt_mod_prob_d5 <- get_prob(spills_IPW_mixed_bl[, 33:64], 
                                             spills_IPW_mixed_bl$mkt_mod_cond_d5,
                                             conds)
spills_IPW_mixed_bl$mkt_mod_prob_d10 <- get_prob(spills_IPW_mixed_bl[, 65:96], 
                                             spills_IPW_mixed_bl$mkt_mod_cond_d10,
                                             conds)

#getting market level averages
spills_IPW_ind_mktlvl_bl <- spills_IPW_ind_bl %>% group_by(id) %>% 
  summarise(prob_d1_ind = mean(mkt_mod_prob_d1),
            cond_d1_ind = unique(mkt_mod_cond_d1),
            d1_000_000_ind = mean(d1_000_000), 
            d1_100_000_ind = mean(d1_100_000),
            d1_010_000_ind = mean(d1_010_000),
            d1_001_000_ind = mean(d1_001_000))
#making a column that represents the average probability of a market being in
#the treatment condition to which it was **assigned** (i.e. not the condition 
#in which it actually finds itself, which could be a spillover condition)
#getting columns of assigned treatments
treat_assigned <- apply(treat_conds_mktlvl, 1, 
                        function(x) paste(x, sep = "", collapse = ""))
treat_assigned <- sapply(treat_assigned, 
                      function(x) paste(x, "000", sep = "_", collapse = ""))
poss_treat <- unique(treat_assigned)
spills_IPW_ind_mktlvl_bl$prob_d1_ind_main_only <- get_prob(spills_IPW_ind_mktlvl_bl[, 4:7],
                                                           treat_assigned[IPW_mkts],
                                                           poss_treat)

spills_IPW_mixed_mktlvl_bl <- spills_IPW_mixed_bl %>% group_by(id) %>% 
  summarise(prob_d2_mix = mean(mkt_mod_prob_d2),
            cond_d2_mix = unique(mkt_mod_cond_d2),
            prob_d5_mix = mean(mkt_mod_prob_d5),
            cond_d5_mix = unique(mkt_mod_cond_d5),
            prob_d10_mix = mean(mkt_mod_prob_d10),
            cond_d10_mix = unique(mkt_mod_cond_d10),
            d2_000_000_mix = mean(d2_000_000), 
            d2_100_000_mix = mean(d2_100_000),
            d2_010_000_mix = mean(d2_010_000),
            d2_001_000_mix = mean(d2_001_000),
            d5_000_000_mix = mean(d5_000_000), 
            d5_100_000_mix = mean(d5_100_000),
            d5_010_000_mix = mean(d5_010_000),
            d5_001_000_mix = mean(d5_001_000),
            d10_000_000_mix = mean(d10_000_000), 
            d10_100_000_mix = mean(d10_100_000),
            d10_010_000_mix = mean(d10_010_000),
            d10_001_000_mix = mean(d10_001_000))

#### Getting IPW at Mkt Lvl using Endline (for Additional Analysis)
#getting modal outcomes per market
spills_IPW_ind_el$mkt_mod_cond_d1 <- ave(spills_IPW_ind_el$cond_d1, 
                                         spills_IPW_ind_el$id,
                                         FUN = function(x) getmode(x, na.rm = T)) 
spills_IPW_mixed_el$mkt_mod_cond_d2 <- ave(spills_IPW_mixed_el$cond_d2, 
                                           spills_IPW_mixed_el$id,
                                           FUN = function(x) getmode(x, na.rm = T)) 
spills_IPW_mixed_el$mkt_mod_cond_d5 <- ave(spills_IPW_mixed_el$cond_d5, 
                                           spills_IPW_mixed_el$id,
                                           FUN = function(x) getmode(x, na.rm = T)) 
spills_IPW_mixed_el$mkt_mod_cond_d10 <- ave(spills_IPW_mixed_el$cond_d10, 
                                            spills_IPW_mixed_el$id,
                                            FUN = function(x) getmode(x, na.rm = T)) 

#getting probabilities of modal outcome for each respondent
spills_IPW_ind_el$mkt_mod_prob_d1 <- get_prob(spills_IPW_ind_el[, 1:32], 
                                              spills_IPW_ind_el$mkt_mod_cond_d1,
                                              conds)
spills_IPW_mixed_el$mkt_mod_prob_d2 <- get_prob(spills_IPW_mixed_el[, 1:32], 
                                                spills_IPW_mixed_el$mkt_mod_cond_d2,
                                                conds)
spills_IPW_mixed_el$mkt_mod_prob_d5 <- get_prob(spills_IPW_mixed_el[, 33:64], 
                                                spills_IPW_mixed_el$mkt_mod_cond_d5,
                                                conds)
spills_IPW_mixed_el$mkt_mod_prob_d10 <- get_prob(spills_IPW_mixed_el[, 65:96], 
                                                 spills_IPW_mixed_el$mkt_mod_cond_d10,
                                                 conds)

#getting market level averages
spills_IPW_ind_mktlvl_el <- spills_IPW_ind_el %>% group_by(id) %>% 
  summarise(prob_d1_ind = mean(mkt_mod_prob_d1),
            cond_d1_ind = unique(mkt_mod_cond_d1),
            d1_000_000_ind = mean(d1_000_000), 
            d1_100_000_ind = mean(d1_100_000),
            d1_010_000_ind = mean(d1_010_000),
            d1_001_000_ind = mean(d1_001_000))
#making a column that represents the average probability of a market being in
#the treatment condition to which it was **assigned** (i.e. not the condition 
#in which it actually finds itself, which could be a spillover condition)
spills_IPW_ind_mktlvl_el$prob_d1_ind_main_only <- get_prob(spills_IPW_ind_mktlvl_el[, 4:7],
                                                           treat_assigned[IPW_mkts],
                                                           poss_treat)

spills_IPW_mixed_mktlvl_el <- spills_IPW_mixed_el %>% group_by(id) %>% 
  summarise(prob_d2_mix = mean(mkt_mod_prob_d2),
            cond_d2_mix = unique(mkt_mod_cond_d2),
            prob_d5_mix = mean(mkt_mod_prob_d5),
            cond_d5_mix = unique(mkt_mod_cond_d5),
            prob_d10_mix = mean(mkt_mod_prob_d10),
            cond_d10_mix = unique(mkt_mod_cond_d10),
            d2_000_000_mix = mean(d2_000_000), 
            d2_100_000_mix = mean(d2_100_000),
            d2_010_000_mix = mean(d2_010_000),
            d2_001_000_mix = mean(d2_001_000),
            d5_000_000_mix = mean(d5_000_000), 
            d5_100_000_mix = mean(d5_100_000),
            d5_010_000_mix = mean(d5_010_000),
            d5_001_000_mix = mean(d5_001_000),
            d10_000_000_mix = mean(d10_000_000), 
            d10_100_000_mix = mean(d10_100_000),
            d10_010_000_mix = mean(d10_010_000),
            d10_001_000_mix = mean(d10_001_000))

#changing colnames to help with merging later
names(spills_IPW_ind_mktlvl_el) <- sapply(names(spills_IPW_ind_mktlvl_el),
                                          function(x){
                                            paste0(x, "_el")
                                          })

# Saving -----------------------------------------------------------------------
save(spills_IPW, spills_IPW_ind_bl, spills_IPW_mixed_bl, 
     spills_Miguel, spills_IPW_ind_mktlvl_bl, spills_IPW_mixed_mktlvl_bl,
     spills_IPW_ind_mktlvl_el, spills_IPW_mixed_mktlvl_el, 
     adj_b, adj_e,
     file = paste0(formatted_path, "spillovers.RDATA"))
